knitr::opts_chunk$set(
warning=FALSE,
message=FALSE,
results = 'asis',
error = FALSE,
tidy = TRUE,
fig.show = "hold")LPS-MIA base analysis
library(DESeq2)
library(xlsx)
library(Hmisc)
library(dplyr)
library(viridis)
library(pheatmap)
library(variancePartition)
library(RColorBrewer)
library(pcaExplorer)
library(vsn)
library(EnhancedVolcano)
library(rapport)
options(check.names = FALSE)PCA.covar <- function(df, meta, heatmap.r = F, data.r = F, heatmap.p = T, type = "spearman") {
l <- length(colnames(meta))
l2 <- l + 1
pca.cov <- prcomp(t(df))
PC.covs <- pca.cov$x[, 1:l]
PC_cov_correlation <- rcorr(as.matrix(PC.covs), as.matrix(meta), type = type)
PC_variance.explained <- PC_cov_correlation$r[1:l, l2:length(rownames(PC_cov_correlation$r))]
PC_cov.cor.p <- PC_cov_correlation$P[1:l, l2:length(rownames(PC_cov_correlation$r))]
if (heatmap.r)
pheatmap(PC_variance.explained, cluster_rows = F, cluster_cols = F, show_colnames = T,
color = heatmap.color.code)
if (heatmap.p)
pheatmap(PC_cov.cor.p, cluster_rows = F, cluster_cols = F, show_colnames = T,
color = colorRampPalette(c("red", "white"))(50), display_numbers = T)
if (data.r)
return(PC.covs)
}
createDT <- function(DF, caption = "", scrollY = 500) {
data <- DT::datatable(DF, caption = caption, extensions = "Buttons", options = list(dom = "Bfrtip",
buttons = c("copy", "csv", "excel", "pdf", "print"), scrollY = scrollY, scrollX = T,
scrollCollapse = T, paging = F, columnDefs = list(list(className = "dt-center",
targets = "_all"))))
return(data)
}# Subsetting a df to samples of interest (accute)
subset.df <- gfilt.df[colnames(gfilt.df) %in% rownames(filt.meta[filt.meta$Batch %in%
2 & filt.meta$`Cell line` %in% c("OH1.5") & filt.meta$`Accute/chronic` %in% c("Accute"),
])]
subset.df <- subset.df[rowSums(subset.df) >= length(colnames(subset.df)), ] #Also removes ERCC SIs
subset.meta <- filt.meta_clean[rownames(filt.meta_clean) %in% colnames(subset.df),
]
# Sort columns
subset.meta <- subset.meta[sort(rownames(subset.meta)), ]
subset.df <- subset.df[sort(colnames(subset.df))]
all(rownames(subset.meta) == colnames(subset.df))[1] TRUE
# Quick density QC 550 x 450
plot.density(subset.meta, to.color = "Stimulation", to.plot = "`Library size`")
# Quick covariate analysis
cov_for.cor <- cov_for.cor[sort(rownames(cov_for.cor)), ]
cov_for.cor.sub <- cov_for.cor[rownames(cov_for.cor) %in% colnames(subset.df), ]
cov_for.cor.sub <- cov_for.cor.sub[!colnames(cov_for.cor.sub) %in% c("Accute/chronic",
"Cell line", "Batch", "Experiment")]
covar.cor.sub <- rcorr(as.matrix(cov_for.cor.sub), type = "pearson")
covar.cor.sub_1 <- covar.cor.sub$r
# 500x450
pheatmap(covar.cor.sub_1, color = heatmap.color.code)# D100 PCA-covariate analysis 500x500
D100.PCAcovar <- PCA.covar(subset.df, cov_for.cor.sub, data.r = T, type = "pearson")# For correcting the data
subset.meta$"Log(library size)" <- scale(subset.meta$`Library size`)
subset.meta$`Scaled percent. MT` <- scale(subset.meta$"Percent. MT")
# Create a DESeq2 matrix
dds.complex <- DESeqDataSetFromMatrix(countData = as.matrix(subset.df), colData = data.frame(subset.meta),
design = ~Log.library.size. + Percent..ERCC + Stimulation)
# version 2 based on dds object Estimate library size correction scaling factors
dds.complex <- estimateSizeFactors(dds.complex)
vsd.complex <- vst(dds.complex)# Doing QC on the vst-transformed values
meanSdPlot(assay(vsd.complex))
sampleDists <- dist(t(assay(vsd.complex)))
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- colnames(vsd.complex)
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)
annot_df <- data.frame(colData(dds.complex)$Stimulation)
colnames(annot_df) <- c("Stimulation")
rownames(annot_df) <- colnames(dds.complex)
annot_colors <- vector("list", length = 1)
annot_colors$Stimulation[["CTR"]] <- "#00AFBB"
annot_colors$Stimulation[["LPS"]] <- "#E7B800"
# Heatmap intersample-distance
pheatmap(sampleDistMatrix, clustering_distance_rows = sampleDists, clustering_distance_cols = sampleDists,
col = colors, annotation_row = annot_df, annotation_colors = annot_colors)# IQR of samples
norm.df <- assay(vsd.complex)
sOutliers.norm <- IQRoutlier.detector(norm.df, barplot = T)# PCA 550x450
pca.norm <- plotPCA.custom(as.matrix(norm.df), intgroup = c("Cell line", "Stimulation",
"Experiment"), ntop = 50000, returnData = TRUE, metadata = subset.meta)
PoV.norm <- round(100 * attr(pca.norm, "percentVar"))
PCAplot(pca.norm, Condition = "Stimulation", PoV.df = PoV.norm)# Remove batch effects Create a meta df that corresponds to the sorted columns of
# the vsd assay but has covariates as numeric factors
covs_forremBatch <- subset.meta
covs_forremBatch$Stimulation <- as.factor(as.numeric(covs_forremBatch$Stimulation))
covs_forremBatch$Experiment <- as.factor(as.numeric(covs_forremBatch$Experiment))
covs_forremBatch$`Cell line` <- as.factor(as.numeric(covs_forremBatch$`Cell line`))
covs_forremBatch$Batch <- as.factor(as.numeric(covs_forremBatch$Batch))
covs_forremBatch$Replicate <- as.factor(as.numeric(as.factor(covs_forremBatch$Replicate)))
covs_forremBatch <- covs_forremBatch[!colnames(covs_forremBatch) %in% c("Duplicate",
"Accute/chronic")]
batch.rem <- removeBatchEffect(as.matrix(assay(vsd.complex)), covariates = as.matrix(cbind(covs_forremBatch$`Log(library size)`,
covs_forremBatch$`Percent. ERCC`)), design = model.matrix(~covs_forremBatch$Stimulation))# ====PC loadings for covariates====# 500x500
covs_forremBatch <- covs_forremBatch[colnames(covs_forremBatch) %in% colnames(cov_for.cor.sub)]
D100.PCAcovar.postcorrection <- PCA.covar(batch.rem, covs_forremBatch, data.r = T,
type = "pearson", heatmap.r = T)
# PCA (550 x 450)
pca.cor <- plotPCA.custom(as.matrix(batch.rem), intgroup = c("Stimulation", "Cell line",
"Experiment"), ntop = 50000, returnData = TRUE, metadata = subset.meta, pc.1 = 1,
pc.2 = 2)
PoV.cor <- round(100 * attr(pca.cor, "percentVar"))# Inter sample distances post correction
sampleDists.postcor <- dist(t(batch.rem))
sampleDistMatrix.postcor <- as.matrix(sampleDists.postcor)
rownames(sampleDistMatrix.postcor) <- colnames(vsd.complex)
colnames(sampleDistMatrix.postcor) <- NULL
# Heatmap intersample-distance 650x500
pheatmap(sampleDistMatrix.postcor, show_rownames = F, clustering_distance_rows = sampleDists.postcor,
clustering_distance_cols = sampleDists.postcor, col = colors, annotation_row = annot_df,
annotation_colors = annot_colors)dds.complex <- DESeq(dds.complex)
res.complex <- results(dds.complex, name = "Stimulation_LPS_vs_CTR")
summary(res.complex)out of 16950 with nonzero total read count adjusted p-value < 0.1 LFC > 0 (up) : 2, 0.012% LFC < 0 (down) : 16, 0.094% outliers [1] : 0, 0% low counts [2] : 0, 0% (mean count < 1) [1] see ‘cooksCutoff’ argument of ?results [2] see ‘independentFiltering’ argument of ?results
(5.1) Results
#Volcano plot
#700x750
EnhancedVolcano(res.complex,
lab = rownames(res.complex),
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 2,
labSize = 6,
legendPosition = 'right')# Grab significant downreg & highest lfc genes
if (length(res.complex[is.na(res.complex$padj), ]$padj) != 0) res.complex[is.na(res.complex$padj),
]$padj <- 1
downreg.genes <- rownames(res.complex[res.complex$log2FoldChange < -2 & res.complex$padj <
0.05, ])
upreg.genes <- rownames(res.complex[res.complex$log2FoldChange > 2 & res.complex$padj <
0.05, ])
# Plot the genes heatmap based on sorted samples for Stimulation:
fact.to.sort <- as.numeric(annot_df$Stimulation)
sorted <- batch.rem[, order(fact.to.sort, decreasing = F)]
sub <- sorted[rownames(sorted) %in% c(upreg.genes, downreg.genes), ]
# 550x1300
pheatmap(sub, cluster_rows = T, cluster_cols = F, annotation_legend = T, show_colnames = F,
annotation_col = annot_df[-2], color = heatmap.color.code, scale = "row", annotation_colors = annot_colors)# Boxplots of DEGs
rownames(sub) <- gsub("\\.", "_", rownames(sub))
rownames(sub) <- gsub("\\-", "_", rownames(sub))
box <- data.frame(scale(t(sub)), Stimulation = annot_df[order(fact.to.sort, decreasing = F),
])
# box.chr <- data.frame(t(sub.ac), Stimulation=sub.meta.ac$Stimulation)
box$Stimulation <- factor(box$Stimulation, levels = c("CTR", "LPS"))
# Boxplots
library(ggplot2)
all.p <- NULL
for (i in rownames(sub)) {
n <- as.factor(i)
p = ggplot(box, aes_string(x = "Stimulation", y = i, fill = "Stimulation")) +
geom_boxplot() + scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) +
theme(legend.position = "Type", plot.title = element_text(size = 11)) + theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) + ylab(paste(i, "(residual expression)")) +
theme(axis.text.x = element_blank(), axis.title.x = element_blank())
all.p[[i]] <- p
}
library(ggpubr)
# 700x550
all.genes <- ggarrange(plotlist = all.p[1:length(all.p)], common.legend = T)
all.genes